
library(basedosdados)
library(geobr)
library(xtable)

#This code computes table A1 directly from publicly available RAIS data hosted at Base dos Dados (https://basedosdados.org/)
setwd("~/Dropbox/Academic/Papers/Juizes Brasil")

basedosdados::set_billing_id("projeto-teste-302617")

query = "SELECT
  ano,
  AVG(CASE WHEN SUBSTR(cbo_2002, 1, 4) = '1113' THEN valor_remun_media_nominal ELSE NULL END) AS salary_judge,
  AVG(CASE WHEN SUBSTR(CAST(natureza_juridica AS STRING), 1, 3) IN ('101', '104', '107', '110', '113', '116') AND SUBSTR(cbo_2002, 1, 4) != '1113'
      THEN valor_remun_media_nominal ELSE NULL END) AS salary_public_federal,
  AVG(CASE WHEN SUBSTR(CAST(natureza_juridica AS STRING), 1, 1) = '2' THEN valor_remun_media_nominal ELSE NULL END) AS salary_private,
  AVG(CASE WHEN SUBSTR(cbo_2002, 1, 4) = '2410' THEN valor_remun_media_nominal ELSE NULL END) AS salary_lawyer,
  AVG(CASE WHEN cbo_2002 = '242235'             THEN valor_remun_media_nominal ELSE NULL END) AS salary_attorney,
  AVG(CASE WHEN cbo_2002 = '242405'             THEN valor_remun_media_nominal ELSE NULL END) AS salary_defender,
  AVG(CASE WHEN SUBSTR(cbo_2002, 1, 3) = '331'  THEN valor_remun_media_nominal ELSE NULL END) AS salary_teacher,
  AVG(CASE WHEN SUBSTR(cbo_2002, 1, 3) = '223'  THEN valor_remun_media_nominal ELSE NULL END) AS salary_health,
FROM `basedosdados.br_me_rais.microdados_vinculos`
WHERE ano >= 2003
GROUP BY ano
ORDER BY ano"

df = read_sql(query)

print(xtable(df,
             type = "latex",
             include.rownames=FALSE),
      file = "output/tables/tab_RAIS_wages_judges_vs_others.tex")


